package com.chb.flink.source

import org.apache.flink.api.scala.{AggregateDataSet, ExecutionEnvironment}

object HdfsFileSource {
    def main(args: Array[String]): Unit = {
        //初始化Flink的上下文执行环境
        val env = ExecutionEnvironment.getExecutionEnvironment
        env.setParallelism(1)
        //导入隐式转换
        import org.apache.flink.streaming.api.scala._

        //读取数据
        val data = env.readTextFile("hdfs://ShServer:9000/tmp/README.txt")

        //转换计算
        val result: AggregateDataSet[(String, Int)] = data.flatMap(_.split(","))
            .map((_, 1))
            .groupBy(0)
            .sum(1)

        //打印结果到控制台
        result.print()
    }
}